Documentation for the analysis of mouse placental RNA-seq data at e7.5, e8.5 and e9.5.
Load essential files + functions + libraries
tpm2 <- read.table("/work/LAS/geetu-lab/hhvu/project1_2/rna-seq/PlacentaRNA-seq/Files/tpmForClustering.txt", header = T)
summ <- function(trans_cluster, tpm2=tpm2) {
tpm3 <- tpm2
tpm3 <- cbind(rownames(tpm2), tpm3)
rownames(tpm3) <- 1:nrow(tpm3)
colnames(tpm3)[1] <- "transcripts"
#e7.5
e7.5_table <- data.frame(tpm3$transcripts, rowMeans(tpm3[,c("E7.5_2", "E7.5_3", "E7.5_4", "E7.5_5", "E7.5_6")]))
e7.5_table$time <- c("e7.5")
rownames(e7.5_table) <- 1:nrow(e7.5_table)
colnames(e7.5_table) <- c("transcripts", "mean_cts_scaled", "time")
#e8.5
e8.5_table <- data.frame(tpm3$transcripts, rowMeans(tpm3[,c("E8.5_1", "E8.5_2", "E8.5_3", "E8.5_4", "E8.5_5", "E8.5_6")]))
e8.5_table$time <- c("e8.5")
rownames(e8.5_table) <- 1:nrow(e8.5_table)
colnames(e8.5_table) <- c("transcripts", "mean_cts_scaled", "time")
#e9.5
e9.5_table <- data.frame(tpm3$transcripts, rowMeans(tpm3[,c("E9.5_1", "E9.5_2", "E9.5_3", "E9.5_4", "E9.5_5")]))
e9.5_table$time <- c("e9.5")
rownames(e9.5_table) <- 1:nrow(e9.5_table)
colnames(e9.5_table) <- c("transcripts", "mean_cts_scaled", "time")
summary <- rbind(rbind(e7.5_table, e8.5_table), e9.5_table)
summary <- summary[order(summary$transcripts),]
summary <- inner_join(summary, trans_cluster, by = c("transcripts" = "name"))
summary <- inner_join(summary, t2g, by = c("transcripts" = "target_id"))
return(summary)
}
plotClus <- function(summary, title){
ascl2 <- subset(summary, summary$ext_gene == "Ascl2") #e7.5
gjb5 <- subset(summary, summary$ext_gene == "Gjb5") #e7.5
dnmt1 <- subset(summary, summary$ext_gene == "Dnmt1") #e8.5
itga4 <- subset(summary, summary$ext_gene == "Itga4") #e8.5
gjb2 <- subset(summary, summary$ext_gene == "Gjb2") #e9.5
igf2 <- subset(summary, summary$ext_gene == "Igf2") #e9.5
p <- ggplot(aes(time, mean_cts_scaled), data = summary) +
geom_line(aes(group = transcripts), alpha = 0.5, colour = "grey77") +
geom_line(stat = "summary", fun = "median", size = 2,
aes(group = 1, color = "Group median")) +
labs(title = title,
x = "Time point",
y = "Scaled mean transcript counts", color = "Legend", linetype = "Legend") +
theme(plot.title = element_text(size = 15, face = "bold"), legend.text=element_text(size=20)) +
geom_line(data = ascl2, size = 2.5, aes(group = transcripts, color = "Ascl2", linetype = "Ascl2"), alpha = 1) + #e7.5
geom_line(data = gjb5, size = 2, aes(group = transcripts, color = "Gjb5", linetype = "Gjb5" ), alpha = 1) + #e7.5
geom_line(data = dnmt1, size = 2, aes(group = transcripts, color = "Dnmt1", linetype = "Dnmt1"), alpha = 1) + #e8.5
geom_line(data = itga4, size = 2, aes(group = transcripts, color = "Itga4", linetype = "Itga4"), alpha = 1) + #e8.5
geom_line(data = gjb2, size = 2, aes(group = transcripts, color = "Gjb2", linetype = "Gjb2"), alpha = 1) + #e9.5
geom_line(data = igf2, size = 2, aes(group = transcripts, color = "Igf2", linetype = "Igf2"), alpha = 1) + #e9.5
scale_color_manual(name = "Legend", values = c("Ascl2" = "darkolivegreen4", "Gjb5" = "yellow3",
"Dnmt1" = "dodgerblue4", "Itga4" = "deepskyblue4",
"Gjb2" = "saddlebrown", "Igf2" = "salmon3",
"Group median" = "grey22")) +
scale_linetype_manual(name = "Legend", values = c("Ascl2" = "solid", "Gjb5" = "solid",
"Dnmt1" = "twodash", "Itga4" = "solid",
"Gjb2" = "solid", "Igf2" = "solid",
"Group median" = "solid")) +
guides(linetype=F,
colour=guide_legend(keywidth = 3, keyheight = 1)) +
theme(text = element_text(size=15),
legend.text=element_text(size=15),
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.text.x = element_text(angle=0, hjust=0.5, size = 25),
axis.text.y = element_text(size = 15)) +
facet_grid(cols = vars(value))
return(p)
}
library("ggplot2", suppressMessages())
library("dplyr", suppressMessages())
library("tidyverse", suppressMessages())
t2g <- read.table("/work/LAS/geetu-lab/hhvu/project1_2/rna-seq/PlacentaRNA-seq/Files/t2g.txt", header = T, sep = "\t")
t2g <- t2g[order(t2g$target_id),]
coding <- read.table("/work/LAS/geetu-lab/hhvu/project1_2/rna-seq/PlacentaRNA-seq/Files/Mus_musculus_grcm38_coding_transcripts.txt", header = F)
hc <- hclust(dist(tpm2, method = "euclidean"), "complete")
dend <- as.dendrogram(hc)
trans_cluster <- cutree(hc, k = 3) %>% enframe()
table(trans_cluster$value)
#>
#> 1 2 3
#> 8091 7238 8242
summK3 <- summ(trans_cluster, tpm2)
p <- plotClus(summK3, "Hierarchical Clustering of Transcripts, k = 3")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p
trans_cluster <- cutree(hc, k = 4) %>% enframe()
table(trans_cluster$value)
#>
#> 1 2 3 4
#> 8091 7238 4501 3741
summK4 <- summ(trans_cluster, tpm2)
p <- plotClus(summK4, "Hierarchical Clustering of Transcripts, k = 4")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p
trans_cluster <- cutree(hc, k = 5) %>% enframe()
table(trans_cluster$value)
#>
#> 1 2 3 4 5
#> 8091 7238 4501 2532 1209
summK5 <- summ(trans_cluster, tpm2)
p <- plotClus(summK5, "Hierarchical Clustering of Transcripts, k = 5")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p
trans_cluster <- cutree(hc, k = 6) %>% enframe()
table(trans_cluster$value)
#>
#> 1 2 3 4 5 6
#> 4833 3258 7238 4501 2532 1209
summK6 <- summ(trans_cluster, tpm2)
p <- plotClus(summK6, "Hierarchical Clustering of Transcripts, k = 6")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p
trans_cluster <- cutree(hc, k = 7) %>% enframe()
table(trans_cluster$value)
#>
#> 1 2 3 4 5 6 7
#> 4833 3258 7238 4501 1349 1209 1183
summK7 <- summ(trans_cluster, tpm2)
p <- plotClus(summK7, "Hierarchical Clustering of Transcripts, k = 7")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p
trans_cluster <- cutree(hc, k = 8) %>% enframe()
table(trans_cluster$value)
#>
#> 1 2 3 4 5 6 7 8
#> 4833 3258 7238 2429 1349 1209 1183 2072
summK8 <- summ(trans_cluster, tpm2)
p <- plotClus(summK8, "Hierarchical Clustering of Transcripts, k = 8")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p
trans_cluster <- cutree(hc, k = 9) %>% enframe()
table(trans_cluster$value)
#>
#> 1 2 3 4 5 6 7 8 9
#> 2838 3258 7238 2429 1349 1995 1209 1183 2072
summK9 <- summ(trans_cluster, tpm2)
p <- plotClus(summK9, "Hierarchical Clustering of Transcripts, k = 9")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p
trans_cluster <- cutree(hc, k = 10) %>% enframe()
table(trans_cluster$value)
#>
#> 1 2 3 4 5 6 7 8 9 10
#> 2838 1173 7238 2429 1349 2085 1995 1209 1183 2072
summK10 <- summ(trans_cluster, tpm2)
p <- plotClus(summK10, "Hierarchical Clustering of Transcripts, k = 10")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p